---
title: "Fairweather Cosmo: Cleaning and Merging"
output: html_notebook
---

```{r packages}

library(pacman)

p_load(haven, tidyverse, countrycode, psych, conflicted)

conflicts_prefer(psych::alpha)
conflicts_prefer(dplyr::filter)
conflicts_prefer(dplyr::summarize)

```

# Migration Data

```{r mig-data}


# Load the migration data
migration_data <- read_dta("Global Migration Data.dta")

# Make iso3c code
migration_data <- migration_data %>%
  mutate(iso3c = countrycode(ccode, origin = "cown", destination =  "iso3c", 
                             custom_match = c("345" = "SRB"))) %>%
  rename(mig_change = migchange_2015_2020,
         refugee_change = ref_2015_2020)

# Select necessary variables and slim down for merging
migration_data_slim <- migration_data %>%
  select(iso3c, mig_change, refugee_change)



```

# Other Country Level Vars

```{r}


# Import data
other_vars <- read_dta("more_country_vars.dta")

# Make iso3c code
other_vars <- other_vars %>%
  mutate(iso3c = countrycode(ccode, "cown", "iso3c", 
                             custom_match = c("345" = "SRB"))) %>%
  filter(year == 2019) %>%
  select(iso3c, unemp, pop, ln_pop, gdp, ln_gdp, growth) %>%
  rename(unemp_country = unemp)

```

# UN GDP per Capita Data

```{r}

# Load the data
un_gdp_dat <- read_csv("un_data_gdp_cap.csv")


# Rename Bolivia
un_gdp_dat <- un_gdp_dat %>%
  mutate(country = case_match(country,
                              "Bolivia (Plurinational State of)" ~ "Bolivia",
                              .default =  country))

# Create ISO3c for merging
un_gdp_dat <- un_gdp_dat %>%
  mutate(iso3c = countrycode(sourcevar = country,
                            origin = "country.name",
                            destination = "iso3c"))

# Rename and Log our GDP/Capita Variable
un_gdp_dat <- un_gdp_dat %>% 
  mutate(ln_gdppc = log(value)) %>% 
  select(iso3c, ln_gdppc)


```


# Load WVS and Merge

```{r}

wvs_data <- read_dta("WVS_Cross-National_Wave_7_Stata_v5_0.dta")


# Make an iso3c var for merging
wvs_data <- wvs_data %>%
  mutate(iso3c = B_COUNTRY_ALPHA)

# Merging
merged_data <- wvs_data %>% 
  left_join(migration_data_slim) %>% 
  left_join(other_vars) %>% 
  left_join(un_gdp_dat)


```

# Recoding 

```{r recoding}

merged_data <- merged_data %>% 
  mutate(
    
    # close town
    close_town = case_match(Q255,
                            1 ~ 3, 
                            2 ~ 2, 
                            3 ~ 1, 
                            4 ~ 0),
    # close region
    close_region = case_match(Q256, 
                              1 ~ 3, 
                              2 ~ 2, 
                              3 ~ 1, 
                              4 ~ 0), 
    # close country 
    close_country = case_match(Q257, 
                               1 ~ 3, 
                               2 ~ 2, 
                               3 ~ 1, 
                               4 ~ 0),
    # close continent
    close_continent = case_match(Q258, 
                                 1 ~ 3, 
                                 2 ~ 2, 
                                 3 ~ 1, 
                                 4 ~ 0),
    # close world
    close_world = case_match(Q259, 
                             1 ~ 3, 
                             2 ~ 2, 
                             3 ~ 1, 
                             4 ~ 0), 
    
        # national pride 
    natpride = case_match(Q254,
                          5 ~ 0,
                          4 ~ 1,
                          3 ~ 2,
                          2 ~ 3,
                          1 ~ 4), 
    #urban/rural
     urban = case_match(H_URBRURAL,
                       2 ~ 0,
                       1 ~ 1),
    
    # fight for country 
    fight_for_country = case_match(Q151,
                                   1 ~ 1, 
                                   2 ~ 0),
    
    # imm on development
    imm_development = case_match(Q121,
                                 1 ~ 0,
                                 2 ~ 0,
                                 3 ~ 1,
                                 4 ~ 2,
                                 5 ~ 2),
    # imm on diversity
    imm_diversity = case_match(Q123,
                               0 ~ 0,
                               1 ~ 1,
                               2 ~ 2),
    # imm on crime
    imm_crimerate = case_match(Q124,
                               2 ~ 0,
                               1 ~ 1,
                               0 ~ 2),
    # imm on terror
    imm_terrorism = case_match(Q126,
                               2 ~ 0,
                               1 ~ 1,
                               0 ~ 2),
    # imm on unemployment
    imm_unemployment = case_match(Q128,
                                  2 ~ 0,
                                  1 ~ 1,
                                  0 ~ 2),
    # imm on socconflict
    imm_socconflict = case_match(Q129,
                                 2 ~ 0,
                                 1 ~ 1,
                                 0 ~ 2),
    
    # generalized trust
    trust_generalized = case_match(Q61,
                                   4 ~ 0,
                                   3 ~ 1,
                                   2 ~ 2,
                                   1 ~ 3),
    
    #female
        female = case_match(Q260,
                        2 ~ 1,
                        1 ~ 0), 
    
    # edu
    edu = if_else(Q275 < 0, NA_integer_, Q275), 
    
    #unemployed 
    unemp = case_match(Q279,
                       7 ~ 1,
                       c(1:6, 8:10) ~ 0),
    
    #income 
    income_lvl = if_else(Q288 < 1, NA_integer_, Q288),
    
    # age 
    age = if_else(Q262 < 16, NA_integer_, Q262),
    
    # country string
    country_str = as_factor(B_COUNTRY), 
    
    # year
    year = as.numeric(A_YEAR),
    
    # citizen 
    citizen = factor(case_match(Q269,
                         1 ~ "Citizen", 
                         2 ~ "Not Citizen")),
    
    # trust people of other nationality
    trust_other_nat = case_match(Q63,
                                 1 ~ 4,
                                 2 ~ 3,
                                 3 ~ 2,
                                 4 ~ 1),
    
    # bothered by other race neighbor
    diff_race_neigh = case_match(Q19,
                                 1 ~ 0,
                                 2 ~ 1),
    
    # bothered by other lang neighbor
    diff_lang_neigh = case_match(Q26,
                                 1 ~ 0,
                                 2 ~ 1), 
    
    # left-right
    lr = Q240
    
    )


# News Consumption Variables
news_vars <- c("Q201", "Q202", "Q203", "Q204", "Q205", "Q206", "Q207", "Q208")

# Recode so higher is more consumption
merged_data <- merged_data |> 
  mutate(
    
    across(all_of(news_vars), ~case_match(.x, 
                                         1 ~ 4, # Daily
                                         2 ~ 3, # Weekly
                                         3 ~ 2, # Monthly
                                         4 ~ 1, # Less than Monthly
                                         5 ~ 0), # Never
           .names = "news_{.col}")
    
    )

# Take the mean 
merged_data <- merged_data |> 
  mutate(
    total_news = (news_Q201 + news_Q202 + news_Q203 + news_Q204 + news_Q205 + news_Q206 + news_Q207 + news_Q208),
    news_consumption = total_news/8
  )


```



# Country Means of Cosmo and Nationalism

```{r}

# means by country
country_means <- merged_data |> 
  group_by(iso3c) |> 
  summarize(mean_cosmo = mean(close_world, na.rm = T), 
            mean_pride = mean(natpride, na.rm = T)) 

# merge that to the data 
merged_data <- merged_data |>  
  left_join(country_means)


```

# Create Immigration Index and Z-Scores

```{r}

# immigration index 
merged_data <- merged_data %>% 
  mutate(
    imm_index = (imm_development + imm_diversity + imm_crimerate + imm_terrorism + imm_unemployment + imm_socconflict) / 6,
    imm_index = ((imm_index - 0) / 2) * 100
  )


# means and sds for z scores
merged_data <- merged_data %>% 
  rowwise() %>% 
  mutate(
    # sd of all items 
    close_sd = sd(c(close_town, close_region, close_country, close_continent, close_world)),
    # sd of country, continent, and world
    close_sd2 = sd(c(close_country, close_continent, close_world)),
    # mean all items
    close_mean = mean(c(close_town, close_region, close_country, close_continent, close_world)),
    # mean of country, continent, and world
    close_mean2 = mean(c(close_country, close_continent, close_world)),
    ) %>%
  ungroup()

  
# calculate z-scores
merged_data <- merged_data %>% 
  mutate(
    # close world
    close_world_z = if_else(close_sd == 0, 0,  # zero if sd is zero 
                                 (close_world - close_mean)/(close_sd)),
    # close country
    close_country_z = if_else(close_sd2 == 0, 0,  # zero if sd is zero 
                                 (close_country - close_mean2)/(close_sd2))
    ) 


```


# Keep Countries and Save

```{r}

latam <- c("ARG", "BOL", "BRA", "CHL", "COL", "ECU", "GTM", "HND", "MEX", "NIC", "PAN", "PRY", "PER", "URY", "VEN")

latam_data <- merged_data %>% 
  filter(iso3c %in% latam)


write_csv(latam_data, "wvs_latam.csv")

write_csv(merged_data, "wvs_all.csv")



```









